library(readr)
library(readxl)
library(openxlsx)
library(tidyr)
library(tidyverse)
library(dplyr)
library(cregg)
library(cjoint)
library(magrittr)
library(estimatr)
library(ggplot2)
library(cowplot)
library(stargazer)
library(lme4)
library(readxl)
library(aod)
library(car)
library(clubSandwich)
library(openxlsx)
library(lmtest)
library(relaimpo)
library(fixest)

#Set Working Directory

#Load Data

Data <- read_excel("GenderSegVET.xlsx")
View(Data)

# Data Prep ...Factorizing and re-leveling variables for presentational resasons
col_names <- names(Data)
Data[,col_names] <- lapply(Data[,col_names] , factor)
Data$Choice <- as.numeric(Data$Choice)
Data$Choice <- ifelse(Data$Choice == 2, 1, ifelse(Data$Choice == 1, 0, Data$Choice))
levels(Data$IT_Reliance)
Data$IT_Reliance <- factor(Data$IT_Reliance, levels = c("Weak", "Strong"))
levels(Data$Creative_Tasks)
Data$Creative_Tasks <- factor(Data$Creative_Tasks, levels = c("Irrelevant", "Central"))
levels(Data$Family_Friendliness)
Data$Family_Friendliness <- factor(Data$Family_Friendliness, levels = c("Full-time", "Part-time"))
Data$Salary_Expectations <- factor(Data$Salary_Expectations, levels = c("Low", "High"))
Data$Meaningful_Work <- factor(Data$Meaningful_Work, levels = c("Unimportant", "Important"))
Data <- Data %>% mutate(Combined_Profile = paste(IT_Reliance, Social_Interactions, Creative_Tasks, Routinized_Tasks, Entrepreneurial_Tasks, Family_Friendliness, Salary_Expectations, Meaningful_Work, sep = "_"))
#View(Data)


### Analysis for the Main Text -> Figures 2 and 3

## Figure 2 : Conjoint - Overall sample

# Determining marginal means

mmoverall <- mm(Data, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks 
      + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, id = ~ResponseId, h0 = 0.5) 

#View(mmoverall)

mmsigov <- c('ns', 'ns', '*', '*', '*', '*', '*', '*','ns', 'ns', '*', '*', '*', '*', '*', '*') #added by hand
mmoverall$mmsigov = mmsigov
#View(mmoverall)
print(mmoverall)

# Figure 2 Define the desired order of levels manually
mmoverallplot <- plot(mmoverall, vline = 0.5) +
  ggplot2::geom_point(aes(x = estimate, y = level), size = 1.2) +
  ggplot2::geom_errorbar(aes(xmin = lower, xmax = upper, y = level), 
                         width = 0) +
  ggplot2::geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(mmsigov)),
                         x = estimate, y = level), 
                     colour = "black", size = 3.5, position = position_nudge(y = 0.5)) +
  theme(panel.grid.major = element_line(color = "grey82"),
        legend.position = "none",
        axis.text.y = element_text(color = "black", size = 10),
        axis.ticks.y = element_line(color = "black"))

mmoverallplot

# Figure 2 clean and high res.

Figure2 <- plot(mmoverall, vline = 0.5) +
  # make points a bit larger and give them a stroke
  geom_point(aes(x = estimate, y = level),
             size = 2.5,        # bigger points
             shape = 16,        # filled circle with border
             stroke = 0.8,      # border thickness
  )+
  # thicker error bars
  geom_errorbar(aes(xmin = lower, xmax = upper, y = level),
                width = 0,
                size  = 0.8) +     # error‐bar line thickness
  
  # bigger, bolder text labels, nudged up a bit
  geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(mmsigov)),
                x = estimate, y = level),
            size          = 4.5,      # font size in mm
            fontface      = "bold",   # thick (bold) text
            colour        = "black",
            position      = position_nudge(y = 0.6)) +
  
  # bump up all panel text & tweak grid
  theme_minimal(base_size = 14) +     # sets overall base font size
  theme(
    panel.background = element_rect(fill = "white", colour = NA),
    plot.background  = element_rect(fill = "white", colour = NA),
    panel.grid.major      = element_line(color = "grey82"),
    axis.text.y           = element_text(size = 12, face = "bold"),
    axis.text.x           = element_text(size = 12),
    axis.title            = element_text(size = 14, face = "bold"),
    legend.position       = "none"
  )

# preview
print(Figure2)

# saving with explicit dimensions & resolution
ggsave("Figure2.tif",
       plot   = Figure2,
       width  = 6,        # inches
       height = 8,        # inches
       dpi    = 300)



## Figure 3: Conjoint - Gender Subsamples and Differences

# Determining marginal means and differences by Gender

mms1.1 <- cj(Data, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks 
             + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, id = ~ResponseId,estimate = "mm", by =~var_gen, alpha = 0.05, h0 = 0.5)

diff_mms1.1 <- cj(Data, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks 
                  + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, id = ~ResponseId, estimate = "mm_diff", by =~var_gen, alpha = 0.05, h0 = 0.0)

#View(mms1.1)
#View(diff_mms1.1)

# Assign "*" if mms or diffs are significant at 95%-level

sigmms1.1 <- character(nrow(mms1.1)) 

for (i in 1:nrow(mms1.1)) {
  if (mms1.1$p[i] > 0.05) {
    sigmms1.1[i] <- "ns"  
  } else {
    sigmms1.1[i] <- "*"  
  }
}
View(mms1.1)

mms1.1$sigmms1.1 = sigmms1.1


sigmms1.2 <- character(nrow(diff_mms1.1)) 

for (i in 1:nrow(diff_mms1.1)) {
  if (diff_mms1.1$p[i] > 0.05) {
    sigmms1.2[i] <- "ns"  # Assign "*" if the condition is met
  } else {
    sigmms1.2[i] <- "*"  # Assign "n" if the condition is not met
  }
}


diff_mms1.1$sigmms1.2 = sigmms1.2

View(diff_mms1.1)


# Plot the conjoint analysis differences
gendifplot1.1 <- plot(mms1.1, vline = 0.5) + facet_wrap(~BY, ncol = 2L) +
  ggplot2::geom_point(aes(x = estimate, y = level), size = 1.2) +
  ggplot2::geom_errorbar(aes(xmin = lower, xmax = upper, y = level), 
                         width = 0) +

  # Set x-axis limits to center around 0.5 for the first two panels
  coord_cartesian(xlim = c(0.385, 0.615)) + 
  ggplot2::geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(sigmms1.1))),
                     colour = "black", size = 3.5, position = position_nudge(y = .5)) +
  geom_vline(xintercept = 0.5, linetype = "solid", color = "grey42", linewidth = 0.1) +
  theme(legend.position="none") +
  theme(panel.grid.major = element_line(color = "grey82"))

gendifplot1.1 

diff_mms1.2 <- diff_mms1.1[seq(2, nrow(diff_mms1.1), by = 2), ] # Only picking every second attribute

# View(diff_mms1.2)

bwplot <- plot(diff_mms1.2) + ggplot2::facet_wrap(~BY, ncol = 3L)

bwplot1 <- bwplot + theme(panel.grid.major = element_line(color = "grey82"), panel.grid.minor = element_line(color = "grey92"))

bwplot2 <- bwplot1 + theme(legend.title = element_blank())


bwplot2 <-bwplot2 + 
  coord_cartesian(xlim = c(-0.07, 0.07)) + 
  ggplot2::geom_point(aes(x = estimate, y = level), size = 1.2) +
  ggplot2::geom_errorbar(aes(xmin = lower, xmax = upper, y = level), 
                         width = 0) + 
  ggplot2::geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(sigmms1.2))),
                     colour = "black", size = 3.5, position = position_nudge(y = .5), parse = FALSE) +
  geom_vline(xintercept = 0.0, linetype = "solid", color = "grey42", linewidth = 0.1) +
  theme(legend.position="none") + theme(panel.grid.major = element_line(color = "grey82"))

GenddiffplotAB <- plot_grid(gendifplot1.1, bwplot2, labels = "AUTO")
GenddiffplotAB

# Figure 3 clean 

gendifplot1.1 <- plot(mms1.1, vline = 0.5) +
  facet_wrap(~BY, ncol = 2L) +
  
  # beefier, solid circles
  geom_point(
    aes(x = estimate, y = level),
    shape  = 16,
    size   = 2.5,
    stroke = 0.8
  ) +
  
  # thicker horizontal bars
  geom_errorbar(
    aes(xmin = lower, xmax = upper, y = level),
    width = 0,
    size  = 0.8
  ) +
  
  # bold labels, nudged up
  geom_text(
    aes(label = sprintf("%0.2f (%s)", estimate, sigmms1.1),
        x     = estimate, y = level),
    size     = 4.5,
    fontface = "bold",
    colour   = "black",
    position = position_nudge(y = 0.6)
  ) +
  
  # zoom in around 0.5
  coord_cartesian(xlim = c(0.385, 0.615)) +
  
  # reference line
  geom_vline(
    xintercept = 0.5,
    linetype   = "solid",
    color      = "grey42",
    size       = 0.3
  ) +
  
  # white background + grey major grid
  theme_minimal(base_size = 14) +
  theme(
    panel.background = element_rect(fill = "white", colour = NA),
    plot.background  = element_rect(fill = "white", colour = NA),
    panel.grid.major = element_line(color = "grey82"),
    axis.text.y      = element_text(size = 12, face = "bold"),
    axis.text.x      = element_text(size = 12),
    axis.title       = element_text(size = 14, face = "bold"),
    legend.position  = "none",
    strip.text       = element_text(face = "bold")   # <— makes “Female”/“Male” bold
  )


# Preview
print(gendifplot1.1)

diff_mms1.2 <- diff_mms1.1[seq(2, nrow(diff_mms1.1), by = 2), ] 

bwplot2 <- plot(diff_mms1.2) +
  facet_wrap(~BY, ncol = 3L) +
  
  # beefier solid circles
  geom_point(
    aes(x = estimate, y = level),
    shape  = 16,
    size   = 2.5,
    stroke = 0.8
  ) +
  
  # thicker horizontal bars
  geom_errorbarh(
    aes(y    = level, xmin = lower, xmax = upper),
    height = 0,
    size   = 0.8
  ) +
  
  # bold labels, nudged up
  geom_text(
    aes(
      label = sprintf("%0.2f (%s)", estimate, sigmms1.2),
      x     = estimate, y = level
    ),
    size     = 4.5,
    fontface = "bold",
    colour   = "black",
    position = position_nudge(y = 0.6)
  ) +
  
  # reference line
  geom_vline(
    xintercept = 0.0,
    linetype   = "solid",
    colour     = "grey42",
    size       = 0.3
  ) +
  
  # zoom in
  coord_cartesian(xlim = c(-0.07, 0.07)) +
  
  # minimal white background + grey grid
  theme_minimal(base_size = 14) +
  theme(
    panel.background = element_rect(fill = "white", colour = NA),
    plot.background  = element_rect(fill = "white", colour = NA),
    panel.grid.major = element_line(color = "grey82"),
    axis.text.y      = element_text(size = 12, face = "bold"),
    axis.text.x      = element_text(size = 12),
    axis.title       = element_text(size = 14, face = "bold"),
    legend.position  = "none",
    strip.text       = element_text(face = "bold")   # <— makes “Female”/“Male” bold
  )

# Preview
print(bwplot2)


GenddiffplotAB <- plot_grid(gendifplot1.1, bwplot2, labels = "AUTO")
GenddiffplotAB


# (Optional) save as 300 dpi TIFF
ggsave(
  "Figure3.tif",
  plot   = GenddiffplotAB,
  width  = 14,
  height = 7,
  dpi    = 300,
  bg     = "white"
)

### Appendix 

## Diagnostics

#carry over 
fx <- Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work 
plot(cj(Data, fx, id = ~ResponseId, by = ~Choice_Number, estimate = "mm"), group = "Choice_Number", vline = 0.5)


#frequency
plot(cj_freqs(Data, fx, id = ~ResponseId))


#balance test
Data$profile_fac <- factor(Data$Profile)
plot(cj(Data, fx, id = ~ResponseId, by = ~profile_fac, estimate = "mm"), group = "profile_fac", vline = 0.5)


## Robustness Tests

#tables for appendix
write.xlsx(mmoverall, "mm_overall.xlsx", rowNames = FALSE)
write.xlsx(mms1.1, "mm_subgroups.xlsx", rowNames = FALSE)


# nested model
multiICTGen <- lmer(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work  + (1 | ResponseId), data=Data)
summary(multiICTGen)

multiICTGenfixedprofiles <- lmer(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + 
                       Entrepreneurial_Tasks + Family_Friendliness + Salary_Expectations + Meaningful_Work + 
                       factor(Combined_Profile)+ (1 | ResponseId), data = Data)
summary(multiICTGenfixedprofiles)

feols(
  Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + 
    Entrepreneurial_Tasks + Family_Friendliness + Salary_Expectations + Meaningful_Work + Combined_Profile,
  data = Data
)

#log regerssion -> same results
results_overeall_log <- glm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, data=Data, family = binomial)
summary(results_overeall_log)

#with clustered AMCEs and graphs for overall (with and without clusters = same thing)
results_AMCES <- amce(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + 
                        Entrepreneurial_Tasks + Family_Friendliness + Salary_Expectations + Meaningful_Work, data=Data, na.ignore = TRUE, cluster=TRUE)
summary(results_AMCES)


## Relative Import experimenting with different subsets of attributes

# split data by Gender
fOccGenSeg1  <- subset(OccGenSeg1, var_gen != "0")
mOccGenSeg1 <- subset(OccGenSeg1, var_gen != "1")

# Full model with all variables female
full_modelf <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work , data= fOccGenSeg1)
summary(full_modelf)

# Female Model with the first five variables
skilreq_model1f <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks, data = fOccGenSeg1)
summary(skilreq_model1f)

# Model with the last three variables
wpchar_model2f <- lm(Choice ~ Family_Friendliness  + Salary_Expectations + Meaningful_Work, data = fOccGenSeg1)
summary(wpchar_model2f)

# Full model with all variables male
full_modelm <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work , data=mOccGenSeg1)
summary(full_modelm)

# Male Model with the first five variables
model1m <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks, data = mOccGenSeg1)
summary(model1m)

# Male Model with the last three variables
model2m <- lm(Choice ~ Family_Friendliness + Salary_Expectations + Meaningful_Work, data = mOccGenSeg1)
summary(model2m)

# Fit a linear regression model by subgroup
fitf <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, data=fOccGenSeg1)

fitm <- lm(Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work , data=mOccGenSeg1)

# Calculate relative importance of predictors
relimpf <- calc.relimp(fitf, type = "lmg", importance = TRUE)
relimpm <- calc.relimp(fitm, type = "lmg", importance = TRUE)

# View the structure of the relimp object
str(relimpf)
str(relimpm)
# Plot the relative importance of predictors
plot(relimpf)

plot(relimpm)


boot_relf <- boot.relimp(fitf, type = "lmg", nboot = 1000)

boot_evalf <- booteval.relimp(boot_relf)
plot(boot_evalf)


boot_relm <- boot.relimp(fitm, type = "lmg", nboot = 1000)

boot_evalm <- booteval.relimp(boot_relm)
plot(boot_evalm)



## Sample Robustness - No Sek C no Real

# add classes to conjoint dataset
ICTdata<- Data

# Use grepl to find rows where var_class contains the letter "c"
pattern <- "c"

pattern1 <- "R"
# Subset the data frame to exclude those rows
ICTdata_filtered <- ICTdata[!grepl(pattern, ICTdata$var_class), ]


#View(ICTdata_filtered)

ICTdata_filtered <- ICTdata_filtered[!grepl(pattern1, ICTdata_filtered$var_class), ]

# Print the filtered data frame
print("Filtered Data Frame:")
print(ICTdata_filtered)
#View(ICTdata_filtered)


ICTdata_filtered_byRealandSekc <- ICTdata_filtered

#View(ICTdata_filtered_byRealandSekc)


col_names <- names(ICTdata_filtered_byRealandSekc)
ICTdata_filtered_byRealandSekc[,col_names] <- lapply(ICTdata_filtered_byRealandSekc[,col_names] , factor)
ICTdata_filtered_byRealandSekc$Choice <- as.character(ICTdata_filtered_byRealandSekc$Choice)
ICTdata_filtered_byRealandSekc$Choice <- as.numeric(ICTdata_filtered_byRealandSekc$Choice)



## 1. Conjoint Analysis

## 1.1. Overall sample

# 1.1.1 Determining marginal means + the cj command with estimate mm does the same thing

mmoverallnoRC <- mm(ICTdata_filtered_byRealandSekc, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, id = ~ResponseId, h0 = 0.5) 

dev.off()

#View(mmoverall)

mmsigovnoRC <- c('*', '*', '*', '*', '*', '*', 'ns', 'ns','ns', 'ns', '*', '*', '*', '*', '*', '*') #added by hand
mmoverallnoRC$mmsigovnoRC = mmsigovnoRC

#View(mmoverall)
print(mmoverallnoRC)

# 1.1.2 Overall plot -> Figure 1

mmoverallplotnoRC <- plot(mmoverallnoRC, vline = 0.5)+
  ggplot2::geom_text(aes(label= sprintf("%0.2f (%s)", estimate, as.character(mmsigovnoRC))),
                     colour = "black", size = 3, position = position_nudge(y = .5)) +
  theme(legend.position="none") +
  geom_vline(xintercept = 0.5, linetype = "solid", color = "grey52", linewidth = 0.1) +
  theme(panel.grid.major = element_line(color = "grey82"), panel.grid.minor = element_line(color = "grey82"))

mmoverallplotnoRC


## 1.2 Gender subsamples

# 1.2.1 Determining marginal means and differences by Gender

mms1.1noRC <- cj(ICTdata_filtered_byRealandSekc, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + 
                   Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + Salary_Expectations + Meaningful_Work, id = ~ResponseId,estimate = "mm", by =~var_gen, alpha = 0.05, h0 = 0.5)

diff_mms1.1noRC <- cj(ICTdata_filtered_byRealandSekc, Choice ~ IT_Reliance + Social_Interactions + Creative_Tasks + Routinized_Tasks + Entrepreneurial_Tasks + Family_Friendliness  + 
                        Salary_Expectations + Meaningful_Work, id = ~ResponseId, estimate = "mm_diff", by =~var_gen, alpha = 0.05, h0 = 0.0)

#View(mms1.1)
#View(diff_mms1.1)

# 1.2.2 Assign "*" if mms or diffs are significant at 95%-level

sigmms1.1noRC <- character(nrow(mms1.1noRC)) 

for (i in 1:nrow(mms1.1noRC)) {
  if (mms1.1noRC$p[i] > 0.05) {
    sigmms1.1noRC[i] <- "ns"  
  } else {
    sigmms1.1noRC[i] <- "*"  
  }
}
View(mms1.1noRC)

mms1.1noRC$sigmms1.1noRC = sigmms1.1noRC


sigmms1.2noRC <- character(nrow(diff_mms1.1noRC)) 

for (i in 1:nrow(diff_mms1.1noRC)) {
  if (diff_mms1.1noRC$p[i] > 0.05) {
    sigmms1.2noRC[i] <- "ns"  # Assign "*" if the condition is met
  } else {
    sigmms1.2noRC[i] <- "*"  # Assign "n" if the condition is not met
  }
}


diff_mms1.1$sigmms1.2noRC = sigmms1.2noRC

View(diff_mms1.1noRC)


# 1.2.3 Plot the conjoint analysis differences -> Figure 2

gendifplot1.1noRC <- plot(mms1.1noRC, vline = 0.5) + facet_wrap(~BY, ncol = 2L) +
  coord_cartesian(xlim = c(0.385, 0.615)) + 
  ggplot2::geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(sigmms1.1noRC))),
                     colour = "black", size = 3, position = position_nudge(y = .5)) +
  geom_vline(xintercept = 0.5, linetype = "solid", color = "grey42", linewidth = 0.1) +
  theme(legend.position="none") +
  theme(panel.grid.major = element_line(color = "grey82"))

gendifplot1.1noRC

diff_mms1.2noRC <- diff_mms1.1noRC[seq(2, nrow(diff_mms1.1noRC), by = 2), ] # Only picking every second attribute

# View(diff_mms1.2)

bwplotnoRC <- plot(diff_mms1.2noRC) + ggplot2::facet_wrap(~BY, ncol = 3L)

bwplot1noRC <- bwplotnoRC + theme(panel.grid.major = element_line(color = "grey82"), panel.grid.minor = element_line(color = "grey92"))

bwplot2noRC <- bwplot1noRC + theme(legend.title = element_blank())
bwplot2noRC <-bwplot2noRC + 
  coord_cartesian(xlim = c(-0.07, 0.07)) + 
  ggplot2::geom_text(aes(label = sprintf("%0.2f (%s)", estimate, as.character(sigmms1.2noRC))),
                     colour = "black", size = 3, position = position_nudge(y = .5), parse = FALSE) +
  geom_vline(xintercept = 0.0, linetype = "solid", color = "grey42", linewidth = 0.1) +
  theme(legend.position="none") + theme(panel.grid.major = element_line(color = "grey82"))

GenddiffplotABnoRC <- plot_grid(gendifplot1.1noRC, bwplot2noRC, labels = "AUTO")
GenddiffplotABnoRC
